for i in range(len(pair1)):
draw_graph(pair1[i], pair2[i])
# !pip install pyhmy --upgrade
import json
import pandas as pd
import os
import shutil
import re
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from IPython.core.display import display, HTML
def read_data(files, path):
data = []
for file in files:
if "zerolog" in file:
with open(path + file, errors='ignore') as f:
for line in f.readlines():
if '"log-topic":"ds"' in line:
data.append(json.loads(line))
return data
def data_processing(data):
df = pd.DataFrame(data, columns = [ 'time', 'message'])
df["message"] = df["message"].apply(lambda c: c.strip())
# pattern = re.compile('(.*?)\..*?')
# df['time'] = df['time'].apply(lambda c: re.findall(pattern, c)[0].replace("T"," "))
label_pattern = re.compile('ds-(.*?)-.*?')
df['label'] = df['message'].apply(lambda c: re.findall(label_pattern, c)[0])
df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
df.sort_values(by=['time', 'message'], ascending=[True, False], inplace = True)
df.reset_index(inplace = True, drop = True)
return df
def get_time_diff(df):
pair = []
for name, group in df.groupby('label'):
group['time_diff'] = group['time'].diff().dt.microseconds /1000000
label = group.iloc[0].label
new = group[group['message'] == 'ds-' + label + "-end"]
new.reset_index(inplace = True, drop = True)
pair.append(new)
return pair
def draw_graph(new, new2):
label = new.iloc[0].label
html_path = "https://harmony-one.github.io/harmony-log-analysis/" + html_dir.replace("../../docs/", "") + \
label+"_processing_time_comparison.html"
png_path = fig_dir
print("png graphs saved in " + png_path)
print('html graphs saved in ')
display(HTML("<a href='" + html_path + "' target='_blank'>" + html_path + "</a>"))
if not os.path.exists(html_dir):
os.makedirs(html_dir)
if not os.path.exists(png_path):
os.makedirs(png_path)
trace1 = go.Scatter(
x= new["time"],
y= new["time_diff"],
mode='lines',
name=label+" 3/19",
line_color= "#00AEE9",
hovertemplate = "processing time: %{y}<br>" +
"UTC Time: %{x}<br>" +
"<extra></extra>"
)
trace2 = go.Scatter(
x= new2["time"],
y= new2["time_diff"],
mode='lines',
name=label+" 3/20",
line_color= "#FFA07A",
hovertemplate = "processing time: %{y}<br>" +
"UTC Time: %{x}<br>" +
"<extra></extra>"
)
data = [trace1, trace2]
layout = go.Layout(
title=label,
yaxis=dict(
title='Processing Time/ seconds'
),
legend_orientation="h"
)
fig = go.Figure(data=data, layout=layout)
fig.show()
fig.write_html(html_dir + label+"_processing_time_comparison.html")
fig.write_image(png_path + label+"_processing_time_comparison.png",width=1000, height=500)
log_dir_1 = "../../logs/node_logs/ostn_03_19/s0-leader/"
fig_dir = "../../graphs/processing_time/ostn_03_20/s0-leader/"
html_dir = "../../docs/graphs/processing_time/ostn_03_20/s0-leader/"
files_1 = os.listdir(log_dir_1)
data_1 = read_data(files_1, log_dir_1)
df_1 = data_processing(data_1)
log_dir_2 = "../../logs/node_logs/ostn_03_20/s0-leader/"
files_2 = os.listdir(log_dir_2)
data_2 = read_data(files_2, log_dir_2)
df_2 = data_processing(data_2)
df_2['epoch'] = df_2['message'].apply(lambda c: int(re.findall(r'\b\d+\b', c)[0]) if re.findall(r'\b\d+\b', c) else np.nan)
df_2['block'] = df_2['message'].apply(lambda c: int(re.findall(r'\b\d+\b', c)[1]) if re.findall(r'\b\d+\b', c) else np.nan)
digit_pattern = '[0-9]'
df_2['message'] = df_2['message'].apply(lambda c: re.sub(digit_pattern, '', c).strip())
pair1 = get_time_diff(df_1)
pair2 = get_time_diff(df_2)